Run download_data.Rmd and
percentage_of_regional_richness.Rmd First!
library(randomForest)
randomForest 4.6-14
Type rfNews() to see new features/changes/bug fixes.
library(reshape2)
library(rpart)
library(ggplot2)
Attaching package: ‘ggplot2’
The following object is masked from ‘package:randomForest’:
margin
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✔ tibble 3.1.8 ✔ dplyr 1.0.7
✔ tidyr 1.1.3 ✔ stringr 1.4.0
✔ readr 1.4.0 ✔ forcats 0.5.1
✔ purrr 0.3.4
Warning: package ‘tibble’ was built under R version 4.1.2── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::combine() masks randomForest::combine()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
✖ ggplot2::margin() masks randomForest::margin()
library(multcomp)
Loading required package: mvtnorm
Loading required package: survival
city_data
length(city_data$city_gdp_per_population[!is.na(city_data$city_gdp_per_population)])
[1] 30
length(city_data$percentage_urban_area_as_open_public_spaces[!is.na(city_data$percentage_urban_area_as_open_public_spaces)])
[1] 61
length(city_data$happiness_future_life[!is.na(city_data$happiness_future_life)])
[1] 65
length(city_data$mean_population_exposure_to_pm2_5_2019[!is.na(city_data$mean_population_exposure_to_pm2_5_2019)])
[1] 131
fetch_city_data_for <- function(pool_name, include_city_name = F) {
results_filename <- paste(paste('percentage_of_regional_richness__output_', pool_name, 'city', 'richness', 'intercept', sep = "_"), "csv", sep = ".")
results <- read_csv(results_filename)
joined <- left_join(city_data, results)
joined$abs_city_centre_latitude = abs(joined$city_centre_latitude)
required_columns <- c("population_growth", "rainfall_monthly_min", "rainfall_annual_average", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min", "temperature_monthly_max", "happiness_negative_effect", "happiness_positive_effect", "happiness_future_life", "number_of_biomes", "realm", "biome_name", "region_20km_includes_estuary", "region_50km_includes_estuary", "region_100km_includes_estuary", "city_includes_estuary", "region_20km_average_pop_density", "region_50km_average_pop_density", "region_100km_average_pop_density", "city_max_pop_density", "city_average_pop_density", "mean_population_exposure_to_pm2_5_2019", "region_20km_cultivated", "region_20km_urban", "region_50km_cultivated", "region_50km_urban", "region_100km_cultivated", "region_100km_urban", "region_20km_elevation_delta", "region_20km_mean_elevation", "region_50km_elevation_delta", "region_50km_mean_elevation", "region_100km_elevation_delta", "region_100km_mean_elevation", "city_elevation_delta", "city_mean_elevation", "urban", "shrubs", "permanent_water", "open_forest", "herbaceous_wetland", "herbaceous_vegetation", "cultivated", "closed_forest", "share_of_population_within_400m_of_open_space", "percentage_urban_area_as_streets", "percentage_urban_area_as_open_public_spaces_and_streets", "percentage_urban_area_as_open_public_spaces", "city_gdp_per_population", "city_ndvi", "city_ssm", "city_susm", "region_20km_ndvi", "region_20km_ssm", "region_20km_susm", "region_50km_ndvi", "region_50km_ssm", "region_50km_susm", "region_100km_ndvi", "region_100km_ssm", "region_100km_susm", "city_percentage_protected", "region_20km_percentage_protected", "region_50km_percentage_protected", "region_100km_percentage_protected", "city_centre_latitude", "abs_city_centre_latitude")
if (include_city_name) {
required_columns <- append(c("name"), required_columns)
}
required_columns <- append(c("response"), required_columns)
joined[,required_columns]
}
| Merlin Response |
merlin_city_data <- fetch_city_data_for('merlin')
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
merlin_city_data
merlin_city_data_fixed <- rfImpute(response ~ ., merlin_city_data)
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 21.68 120.25 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 21.57 119.65 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 21.74 120.63 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 20.74 115.04 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 21.27 117.97 |
merlin_city_data_fixed
ggplot(merlin_city_data_fixed, aes(response)) + geom_histogram(binwidth = 2)
names(merlin_city_data_fixed)
[1] "response" "population_growth" "rainfall_monthly_min"
[4] "rainfall_annual_average" "rainfall_monthly_max" "temperature_annual_average"
[7] "temperature_monthly_min" "temperature_monthly_max" "happiness_negative_effect"
[10] "happiness_positive_effect" "happiness_future_life" "number_of_biomes"
[13] "realm" "biome_name" "region_20km_includes_estuary"
[16] "region_50km_includes_estuary" "region_100km_includes_estuary" "city_includes_estuary"
[19] "region_20km_average_pop_density" "region_50km_average_pop_density" "region_100km_average_pop_density"
[22] "city_max_pop_density" "city_average_pop_density" "mean_population_exposure_to_pm2_5_2019"
[25] "region_20km_cultivated" "region_20km_urban" "region_50km_cultivated"
[28] "region_50km_urban" "region_100km_cultivated" "region_100km_urban"
[31] "region_20km_elevation_delta" "region_20km_mean_elevation" "region_50km_elevation_delta"
[34] "region_50km_mean_elevation" "region_100km_elevation_delta" "region_100km_mean_elevation"
[37] "city_elevation_delta" "city_mean_elevation" "urban"
[40] "shrubs" "permanent_water" "open_forest"
[43] "herbaceous_wetland" "herbaceous_vegetation" "cultivated"
[46] "closed_forest" "share_of_population_within_400m_of_open_space" "percentage_urban_area_as_streets"
[49] "percentage_urban_area_as_open_public_spaces_and_streets" "percentage_urban_area_as_open_public_spaces" "city_gdp_per_population"
[52] "city_ndvi" "city_ssm" "city_susm"
[55] "region_20km_ndvi" "region_20km_ssm" "region_20km_susm"
[58] "region_50km_ndvi" "region_50km_ssm" "region_50km_susm"
[61] "region_100km_ndvi" "region_100km_ssm" "region_100km_susm"
[64] "city_percentage_protected" "region_20km_percentage_protected" "region_50km_percentage_protected"
[67] "region_100km_percentage_protected" "city_centre_latitude" "abs_city_centre_latitude"
merlin_response <- merlin_city_data_fixed$response
merlin_predictors <- merlin_city_data_fixed[,-1]
merlin_predictors
merlin_interp <- VSURF(x = merlin_predictors, y = merlin_response)
Thresholding step
Estimated computational time (on one core): 117.9 sec.
|
| | 0%
|
|==== | 2%
|
|======= | 4%
|
|=========== | 6%
|
|============== | 8%
|
|================== | 10%
|
|===================== | 12%
|
|========================= | 14%
|
|============================= | 16%
|
|================================ | 18%
|
|==================================== | 20%
|
|======================================= | 22%
|
|=========================================== | 24%
|
|=============================================== | 26%
|
|================================================== | 28%
|
|====================================================== | 30%
|
|========================================================= | 32%
|
|============================================================= | 34%
|
|================================================================ | 36%
|
|==================================================================== | 38%
|
|======================================================================== | 40%
|
|=========================================================================== | 42%
|
|=============================================================================== | 44%
|
|================================================================================== | 46%
|
|====================================================================================== | 48%
|
|========================================================================================== | 50%
|
|============================================================================================= | 52%
|
|================================================================================================= | 54%
|
|==================================================================================================== | 56%
|
|======================================================================================================== | 58%
|
|=========================================================================================================== | 60%
|
|=============================================================================================================== | 62%
|
|=================================================================================================================== | 64%
|
|====================================================================================================================== | 66%
|
|========================================================================================================================== | 68%
|
|============================================================================================================================= | 70%
|
|================================================================================================================================= | 72%
|
|==================================================================================================================================== | 74%
|
|======================================================================================================================================== | 76%
|
|============================================================================================================================================ | 78%
|
|=============================================================================================================================================== | 80%
|
|=================================================================================================================================================== | 82%
|
|====================================================================================================================================================== | 84%
|
|========================================================================================================================================================== | 86%
|
|============================================================================================================================================================== | 88%
|
|================================================================================================================================================================= | 90%
|
|===================================================================================================================================================================== | 92%
|
|======================================================================================================================================================================== | 94%
|
|============================================================================================================================================================================ | 96%
|
|=============================================================================================================================================================================== | 98%
|
|===================================================================================================================================================================================| 100%
Interpretation step (on 44 variables)
Estimated computational time (on one core): between 56.1 sec. and 370.7 sec.
|
| | 0%
|
|==== | 2%
|
|======== | 5%
|
|============ | 7%
|
|================ | 9%
|
|==================== | 11%
|
|======================== | 14%
|
|============================ | 16%
|
|================================= | 18%
|
|===================================== | 20%
|
|========================================= | 23%
|
|============================================= | 25%
|
|================================================= | 27%
|
|===================================================== | 30%
|
|========================================================= | 32%
|
|============================================================= | 34%
|
|================================================================= | 36%
|
|===================================================================== | 39%
|
|========================================================================= | 41%
|
|============================================================================= | 43%
|
|================================================================================= | 45%
|
|===================================================================================== | 48%
|
|========================================================================================== | 50%
|
|============================================================================================== | 52%
|
|================================================================================================== | 55%
|
|====================================================================================================== | 57%
|
|========================================================================================================== | 59%
|
|============================================================================================================== | 61%
|
|================================================================================================================== | 64%
|
|====================================================================================================================== | 66%
|
|========================================================================================================================== | 68%
|
|============================================================================================================================== | 70%
|
|================================================================================================================================== | 73%
|
|====================================================================================================================================== | 75%
|
|========================================================================================================================================== | 77%
|
|============================================================================================================================================== | 80%
|
|================================================================================================================================================== | 82%
|
|======================================================================================================================================================= | 84%
|
|=========================================================================================================================================================== | 86%
|
|=============================================================================================================================================================== | 89%
|
|=================================================================================================================================================================== | 91%
|
|======================================================================================================================================================================= | 93%
|
|=========================================================================================================================================================================== | 95%
|
|=============================================================================================================================================================================== | 98%
|
|===================================================================================================================================================================================| 100%
Prediction step (on 3 variables)
Maximum estimated computational time (on one core): 3.9 sec.
|
| | 0%
|
|============================================================ | 33%
|
|======================================================================================================================= | 67%
|
|===================================================================================================================================================================================| 100%
names(merlin_predictors[,merlin_interp$varselect.interp])
[1] "abs_city_centre_latitude" "region_50km_elevation_delta" "biome_name"
| Birdlife Response |
birdlife_city_data <- fetch_city_data_for('birdlife')
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
birdlife_city_data
ggplot(birdlife_city_data, aes(response)) + geom_histogram(binwidth = 1)
birdlife_city_data_fixed <- rfImpute(response ~ ., birdlife_city_data)
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.673 89.80 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.688 90.04 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.723 90.60 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.884 93.14 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.674 89.83 |
birdlife_city_data_fixed
names(birdlife_city_data_fixed)
[1] "response" "population_growth" "rainfall_monthly_min"
[4] "rainfall_annual_average" "rainfall_monthly_max" "temperature_annual_average"
[7] "temperature_monthly_min" "temperature_monthly_max" "happiness_negative_effect"
[10] "happiness_positive_effect" "happiness_future_life" "number_of_biomes"
[13] "realm" "biome_name" "region_20km_includes_estuary"
[16] "region_50km_includes_estuary" "region_100km_includes_estuary" "city_includes_estuary"
[19] "region_20km_average_pop_density" "region_50km_average_pop_density" "region_100km_average_pop_density"
[22] "city_max_pop_density" "city_average_pop_density" "mean_population_exposure_to_pm2_5_2019"
[25] "region_20km_cultivated" "region_20km_urban" "region_50km_cultivated"
[28] "region_50km_urban" "region_100km_cultivated" "region_100km_urban"
[31] "region_20km_elevation_delta" "region_20km_mean_elevation" "region_50km_elevation_delta"
[34] "region_50km_mean_elevation" "region_100km_elevation_delta" "region_100km_mean_elevation"
[37] "city_elevation_delta" "city_mean_elevation" "urban"
[40] "shrubs" "permanent_water" "open_forest"
[43] "herbaceous_wetland" "herbaceous_vegetation" "cultivated"
[46] "closed_forest" "share_of_population_within_400m_of_open_space" "percentage_urban_area_as_streets"
[49] "percentage_urban_area_as_open_public_spaces_and_streets" "percentage_urban_area_as_open_public_spaces" "city_gdp_per_population"
[52] "city_ndvi" "city_ssm" "city_susm"
[55] "region_20km_ndvi" "region_20km_ssm" "region_20km_susm"
[58] "region_50km_ndvi" "region_50km_ssm" "region_50km_susm"
[61] "region_100km_ndvi" "region_100km_ssm" "region_100km_susm"
[64] "city_percentage_protected" "region_20km_percentage_protected" "region_50km_percentage_protected"
[67] "region_100km_percentage_protected" "city_centre_latitude" "abs_city_centre_latitude"
birdlife_response <- birdlife_city_data_fixed$response
birdlife_predictors <- birdlife_city_data_fixed[,-1]
birdlife_predictors
birdlife_interp <- VSURF(x = birdlife_predictors, y = birdlife_response)
Thresholding step
Estimated computational time (on one core): 105.8 sec.
|
| | 0%
|
|==== | 2%
|
|======= | 4%
|
|=========== | 6%
|
|============== | 8%
|
|================== | 10%
|
|===================== | 12%
|
|========================= | 14%
|
|============================= | 16%
|
|================================ | 18%
|
|==================================== | 20%
|
|======================================= | 22%
|
|=========================================== | 24%
|
|=============================================== | 26%
|
|================================================== | 28%
|
|====================================================== | 30%
|
|========================================================= | 32%
|
|============================================================= | 34%
|
|================================================================ | 36%
|
|==================================================================== | 38%
|
|======================================================================== | 40%
|
|=========================================================================== | 42%
|
|=============================================================================== | 44%
|
|================================================================================== | 46%
|
|====================================================================================== | 48%
|
|========================================================================================== | 50%
|
|============================================================================================= | 52%
|
|================================================================================================= | 54%
|
|==================================================================================================== | 56%
|
|======================================================================================================== | 58%
|
|=========================================================================================================== | 60%
|
|=============================================================================================================== | 62%
|
|=================================================================================================================== | 64%
|
|====================================================================================================================== | 66%
|
|========================================================================================================================== | 68%
|
|============================================================================================================================= | 70%
|
|================================================================================================================================= | 72%
|
|==================================================================================================================================== | 74%
|
|======================================================================================================================================== | 76%
|
|============================================================================================================================================ | 78%
|
|=============================================================================================================================================== | 80%
|
|=================================================================================================================================================== | 82%
|
|====================================================================================================================================================== | 84%
|
|========================================================================================================================================================== | 86%
|
|============================================================================================================================================================== | 88%
|
|================================================================================================================================================================= | 90%
|
|===================================================================================================================================================================== | 92%
|
|======================================================================================================================================================================== | 94%
|
|============================================================================================================================================================================ | 96%
|
|=============================================================================================================================================================================== | 98%
|
|===================================================================================================================================================================================| 100%
Interpretation step (on 48 variables)
Estimated computational time (on one core): between 67.2 sec. and 421.2 sec.
|
| | 0%
|
|==== | 2%
|
|======= | 4%
|
|=========== | 6%
|
|=============== | 8%
|
|=================== | 10%
|
|====================== | 12%
|
|========================== | 15%
|
|============================== | 17%
|
|================================== | 19%
|
|===================================== | 21%
|
|========================================= | 23%
|
|============================================= | 25%
|
|================================================ | 27%
|
|==================================================== | 29%
|
|======================================================== | 31%
|
|============================================================ | 33%
|
|=============================================================== | 35%
|
|=================================================================== | 38%
|
|======================================================================= | 40%
|
|=========================================================================== | 42%
|
|============================================================================== | 44%
|
|================================================================================== | 46%
|
|====================================================================================== | 48%
|
|========================================================================================== | 50%
|
|============================================================================================= | 52%
|
|================================================================================================= | 54%
|
|===================================================================================================== | 56%
|
|======================================================================================================== | 58%
|
|============================================================================================================ | 60%
|
|================================================================================================================ | 62%
|
|==================================================================================================================== | 65%
|
|======================================================================================================================= | 67%
|
|=========================================================================================================================== | 69%
|
|=============================================================================================================================== | 71%
|
|=================================================================================================================================== | 73%
|
|====================================================================================================================================== | 75%
|
|========================================================================================================================================== | 77%
|
|============================================================================================================================================== | 79%
|
|================================================================================================================================================= | 81%
|
|===================================================================================================================================================== | 83%
|
|========================================================================================================================================================= | 85%
|
|============================================================================================================================================================= | 88%
|
|================================================================================================================================================================ | 90%
|
|==================================================================================================================================================================== | 92%
|
|======================================================================================================================================================================== | 94%
|
|============================================================================================================================================================================ | 96%
|
|=============================================================================================================================================================================== | 98%
|
|===================================================================================================================================================================================| 100%
Prediction step (on 2 variables)
Maximum estimated computational time (on one core): 2.7 sec.
|
| | 0%
|
|========================================================================================== | 50%
|
|===================================================================================================================================================================================| 100%
names(birdlife_predictors[,birdlife_interp$varselect.interp])
[1] "population_growth" "region_50km_ssm"
| So…. |
|---|
| Merlin: “abs_city_centre_latitude” “region_50km_elevation_delta” “biome_name” “region_50km_ssm” [5] “region_100km_ssm” “region_20km_elevation_delta” “region_20km_urban” “region_100km_elevation_delta” [9] “temperature_annual_average” “city_ndvi” “city_gdp_per_population” “shrubs” [13] “permanent_water” “city_centre_latitude” “region_20km_cultivated” Birdlife: “population_growth” “region_50km_ssm” |
merlin_city_data_named <- fetch_city_data_for('merlin', T)
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
birdlife_city_data_named <- fetch_city_data_for('birdlife', T)
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
| Use cross validation and dropping terms to find best model |
full model: response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated
merlin_city_data_fixed_no_boreal <- merlin_city_data_fixed[merlin_city_data_fixed$biome_name != 'Boreal Forests/Taiga',]
cv.glm(merlin_city_data_fixed_no_boreal, glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated))$delta[1]
[1] 19.9028
– CVE 19.72841 – Can we drop one?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.98386
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.55342
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.77626
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.85129
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.64188
cv.glm(merlin_city_data_fixed_no_boreal, glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated))$delta[1]
[1] 19.47279
cv.glm(merlin_city_data_fixed_no_boreal, glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated))$delta[1]
[1] 19.74441
cv.glm(merlin_city_data_fixed_no_boreal, glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + permanent_water + city_centre_latitude + region_20km_cultivated))$delta[1]
[1] 19.77247
cv.glm(merlin_city_data_fixed_no_boreal, glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + city_centre_latitude + region_20km_cultivated))$delta[1]
[1] 19.70816
cv.glm(merlin_city_data_fixed_no_boreal, glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated))$delta[1]
[1] 19.59945
cv.glm(merlin_city_data_fixed_no_boreal, glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude))$delta[1]
[1] 19.62638
– drop city_ndvi to give smaller CVE of 19.35 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.58278
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.14889
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.36927
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.22395
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.31519
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.35555
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.29791
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.18198
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.19893
– drop city_centre_latitude to give smaller CVE of 19.06 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.30999
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.87274
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.08238
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.14715
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.96295
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.02621
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.07168
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.01583
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.93091
– drop region_20km_elevation_delta to give smaller CVE of 18.76 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.97096
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.76006
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.92942
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.65642
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.72148
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.76699
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.70275
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.63424
– drop region_20km_cultivated to give smaller CVE of 18.54 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.77671
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.51919
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.70334
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + city_gdp_per_population + shrubs + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.427
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + shrubs + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.48533
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.54951
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.46802
– drop permanent_water to give smaller CVE of 18.34 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.53523
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.37394
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + temperature_annual_average + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.58989
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.27291
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.33385
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.40014
– drop temperature_annual_average to give smaller CVE of 18.14 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_20km_urban + region_100km_elevation_delta + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.33645
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.13093
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.37243
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.14642
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + city_gdp_per_population, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.20601
– drop region_20km_urban to give smaller CVE of 18.03 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_elevation_delta + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.29319
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.22538
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.01086
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta + city_gdp_per_population, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.06183
– drop shrubs to give smaller CVE of 17.95 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_elevation_delta + city_gdp_per_population, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.18528
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + city_gdp_per_population, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.26039
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 17.94321
– drop city_gdp_per_population to give smaller CVE of 17.94 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_elevation_delta, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.06258
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.19515
– No
| – best model with region_100km_ssm + region_100km_elevation_delta (CV error 17.91216) |
summary(glm(data = merlin_city_data_fixed, formula = response ~ region_100km_ssm + region_100km_elevation_delta))
Call:
glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta,
data = merlin_city_data_fixed)
Deviance Residuals:
Min 1Q Median 3Q Max
-7.6625 -3.0246 -0.4496 1.9868 16.9640
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.6877517 1.1338124 2.371 0.0192 *
region_100km_ssm -0.1327133 0.0698210 -1.901 0.0595 .
region_100km_elevation_delta -0.0005261 0.0002944 -1.787 0.0762 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 17.46142)
Null deviance: 2469.6 on 136 degrees of freedom
Residual deviance: 2339.8 on 134 degrees of freedom
AIC: 785.57
Number of Fisher Scoring iterations: 2
reg_merlin = glm(data = merlin_city_data_fixed, formula = response ~ region_100km_ssm + region_100km_elevation_delta)
with(summary(reg_merlin), 1 - deviance/null.deviance)
[1] 0.05255
birdlife_city_data_fixed_no_boreal <- birdlife_city_data_fixed[birdlife_city_data_fixed$biome_name != 'Boreal Forests/Taiga',]
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_ssm + region_50km_elevation_delta + biome_name + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.899862
– can we drop a variable?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + biome_name + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.768164
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_elevation_delta + biome_name + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.752211
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_ssm + biome_name + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.989636
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_ssm + region_50km_elevation_delta + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.503421
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_ssm + region_50km_elevation_delta + biome_name, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.780578
– drop biome_name to give CVE of 6.503421 – can we drop another?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.417311
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_elevation_delta + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.426562
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_ssm + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.430742
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_ssm + region_50km_elevation_delta, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.439714
– drop region_100km_ssm to give CVE of 6.417311 – can we drop another?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_elevation_delta + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.535285
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.342025
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.352664
– drop region_50km_elevation_delta to give CVE of 6.342025 – can we drop another?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.464699
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.291299
– drop city_gdp_per_population to give CVE of 6.291299 – is this better than no variable?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ 1, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.395701
– yes, just!
| – so best model with birdlife is region_50km_ssm |
summary(glm(data = birdlife_city_data_fixed, formula = response ~ region_50km_ssm))
Call:
glm(formula = response ~ region_50km_ssm, data = birdlife_city_data_fixed)
Deviance Residuals:
Min 1Q Median 3Q Max
-4.5353 -1.5461 -0.4124 1.3071 10.7572
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.26916 0.65041 1.951 0.0531 .
region_50km_ssm -0.08499 0.04115 -2.065 0.0408 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 6.214378)
Null deviance: 865.45 on 136 degrees of freedom
Residual deviance: 838.94 on 135 degrees of freedom
AIC: 643.06
Number of Fisher Scoring iterations: 2
reg_birdlife = glm(data = birdlife_city_data_fixed, formula = response ~ region_50km_ssm)
with(summary(reg_birdlife), 1 - deviance/null.deviance)
[1] 0.03062471
ggplot(birdlife_city_data_named, aes(x = region_50km_ssm, y = response)) + geom_point() + geom_smooth(method = "glm")
| Check birdlife model fit |
birdlife.fit <- glm(data = birdlife_city_data_fixed, formula = response ~ region_50km_ssm)
summary(birdlife.fit)
Call:
glm(formula = response ~ region_50km_ssm, data = birdlife_city_data_fixed)
Deviance Residuals:
Min 1Q Median 3Q Max
-4.5353 -1.5461 -0.4124 1.3071 10.7572
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.26916 0.65041 1.951 0.0531 .
region_50km_ssm -0.08499 0.04115 -2.065 0.0408 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 6.214378)
Null deviance: 865.45 on 136 degrees of freedom
Residual deviance: 838.94 on 135 degrees of freedom
AIC: 643.06
Number of Fisher Scoring iterations: 2
with(summary(birdlife.fit), 1 - deviance/null.deviance)
[1] 0.03062471
plot(birdlife.fit)
birdlife_city_data_fixed_no_boreal[c(16, 53, 72), c("region_50km_ssm")]
[1] 18.451180 9.961682 11.644862
city_data[c(16, 53, 72), c("name", "region_50km_ssm")]
dat <- predict(glm(formula = response ~ region_50km_ssm, data = birdlife_city_data_named), se.fit=T)
outside_se <- birdlife_city_data_named[birdlife_city_data_named$response < dat$fit - 15* dat$se.fit | birdlife_city_data_named$response > dat$fit + 15 * dat$se.fit,]
birdlife_ssm = ggplot(birdlife_city_data_named, aes(x = region_50km_ssm, y = response)) +
geom_point(size=1, alpha = 0.2) +
geom_smooth(method = "glm") +
geom_text_repel(aes(label = name), data = outside_se, size = 3, nudge_y = 1, color = "red") +
geom_point(data = outside_se, color = "red") +
theme_bw() +
ylab("Standardized bird retention response") + xlab("Regional (50km) SSM")
birdlife_ssm
ggsave("city_effect_richness__output__birdlife.jpg")
Saving 7.29 x 4.51 in image
birdlife_city_data_named$residuals <- resid(birdlife.fit)
ggplot(birdlife_city_data_named, aes(y = response, x = residuals)) +
geom_point() +
geom_smooth(method = "lm", se = F) +
labs(title = "Birdlife", subtitle = paste("Correlation", cor(birdlife_city_data_named$residuals, birdlife_city_data_named$response))) +
theme_bw()
| Check Merlin model fit |
merlin.fit <- glm(data = merlin_city_data_named, formula = response ~ region_100km_ssm + region_50km_elevation_delta)
summary(merlin.fit)
Call:
glm(formula = response ~ region_100km_ssm + region_50km_elevation_delta,
data = merlin_city_data_named)
Deviance Residuals:
Min 1Q Median 3Q Max
-7.6599 -2.9987 -0.5524 1.7449 16.9143
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.6809304 1.1210300 2.391 0.0182 *
region_100km_ssm -0.1331207 0.0695604 -1.914 0.0578 .
region_50km_elevation_delta -0.0006899 0.0003461 -1.994 0.0482 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 17.36262)
Null deviance: 2469.6 on 136 degrees of freedom
Residual deviance: 2326.6 on 134 degrees of freedom
AIC: 784.8
Number of Fisher Scoring iterations: 2
with(summary(merlin.fit), 1 - deviance/null.deviance)
[1] 0.05791102
plot(merlin.fit)
merlin_city_data_fixed_no_boreal[c(24, 42, 72), c("region_100km_ssm", "region_50km_elevation_delta")]
city_data[c(24, 42, 72), c("name", "region_100km_ssm", "region_50km_elevation_delta")]
ggplot(merlin_city_data_named, aes(x = region_100km_ssm, y = response)) +
geom_point(aes(size = region_50km_elevation_delta)) +
geom_smooth(method = "glm") +
theme_bw() +
theme(legend.position="bottom") +
ylab("Standardized bird retention response") + xlab("SSM in 100 km surrounding city") + guides(size=guide_legend(title="Regional (50 km) elevation delta"))
ggsave("city_effect_richness__output__merlin.jpg")
Saving 7.29 x 4.51 in image
merlin_city_data_named$residuals <- resid(merlin.fit)
ggplot(merlin_city_data_named, aes(y = response, x = residuals)) +
geom_point() +
geom_smooth(method = "lm", se = F) +
labs(title = "Merlin", subtitle = paste("Correlation", cor(merlin_city_data_named$residuals, merlin_city_data_named$response))) +
theme_bw()
| Check AIC |
AIC(
glm(data = merlin_city_data_fixed, formula = response ~ region_100km_ssm + region_50km_ssm + region_50km_elevation_delta + biome_name + population_growth),
glm(data = merlin_city_data_fixed, formula = response ~ region_100km_ssm + region_50km_elevation_delta)
)
AIC(
glm(data = birdlife_city_data_fixed, formula = response ~ region_100km_ssm + region_50km_ssm + region_50km_elevation_delta + biome_name + population_growth),
glm(data = birdlife_city_data_fixed, formula = response ~ region_50km_ssm)
)
| LDG |
birdlife_data_with_lat = fetch_city_data_for('birdlife', include_city_name = T)
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
model2 <- glm(formula = response ~ I(city_centre_latitude^2), data = birdlife_data_with_lat)
dat2 <- predict(model2, se.fit=T)
summary(model2)
Call:
glm(formula = response ~ I(city_centre_latitude^2), data = birdlife_data_with_lat)
Deviance Residuals:
Min 1Q Median 3Q Max
-4.7461 -1.4878 -0.4638 1.2399 9.5839
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.4426687 0.3308024 -1.338 0.1831
I(city_centre_latitude^2) 0.0004780 0.0002725 1.754 0.0817 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 6.267835)
Null deviance: 865.45 on 136 degrees of freedom
Residual deviance: 846.16 on 135 degrees of freedom
AIC: 644.23
Number of Fisher Scoring iterations: 2
with(summary(model2), 1 - deviance/null.deviance)
[1] 0.02228615
outside_ldg_se <- birdlife_data_with_lat[birdlife_data_with_lat$response < dat2$fit - 15* dat2$se.fit | birdlife_data_with_lat$response > dat2$fit + 15 * dat2$se.fit,]
birdlife_ldg = ggplot(birdlife_data_with_lat, aes(x = city_centre_latitude, y = response)) +
geom_point(size=1, alpha = 0.2) +
geom_smooth(method = "glm", formula = y ~ I(x^2)) +
geom_text_repel(aes(label = name), data = outside_ldg_se, size = 3, nudge_y = 1, color = "red") +
geom_point(data = outside_ldg_se, color = "red") +
theme_bw() +
ylab("Standardized bird retention response") + xlab("Latitude (of city centre)") + labs(title = "Birdlife LDG")
birdlife_ldg
ggsave('city_effect_richness__output__ldg_birdlife.jpg')
Saving 7.29 x 4.51 in image
library(grid)
library(gridExtra)
birdlife_plot_ldg = ggplot(birdlife_data_with_lat, aes(x = city_centre_latitude, y = response)) +
geom_point(size=1, alpha = 0.2) +
geom_smooth(method = "glm", formula = y ~ I(x^2)) +
geom_text_repel(aes(label = name), data = outside_ldg_se, size = 3, nudge_y = 1, color = "red") +
geom_point(data = outside_ldg_se, aes(color = region_50km_ssm)) +
scale_colour_gradientn(colours = c("#ffee00", "#0019ff"), limits=c(5,22)) +
guides(colour=guide_legend(title="SSM in 50 km surrounding city")) +
theme_bw() +
ylab("Standardized bird retention response") + xlab("Latitude (of city centre)") +
theme(legend.position="bottom", legend.box = "horizontal")
birdlife_plot_ldg
outside_ssm_se <- birdlife_city_data_named[birdlife_city_data_named$response < dat$fit - 15* dat$se.fit | birdlife_city_data_named$response > dat$fit + 15 * dat$se.fit,]
birdlife_ssm2 = ggplot(birdlife_city_data_named, aes(x = region_50km_ssm, y = response)) +
geom_point(size=1, alpha = 0.2) +
geom_smooth(method = "glm") +
geom_text_repel(aes(label = name), data = outside_ssm_se, size = 3, nudge_y = 1, color = "red") +
geom_point(data = outside_ssm_se, aes(color = region_50km_ssm)) +
scale_colour_gradientn(colours = c("#ffee00", "#0019ff"), limits=c(5,22)) +
guides(colour=guide_legend(title="SSM in 50 km surrounding city")) +
theme_bw() +
ylab("Standardized bird retention response") + xlab("SSM in 50 km surrounding city") +
theme(legend.position="bottom", legend.box = "horizontal")
birdlife_ssm2
library(ggpubr)
birdlife_figure = ggarrange(birdlife_ssm2 + rremove("ylab"), birdlife_plot_ldg + rremove("ylab"), labels = c("a", "b"), ncol = 2, common.legend = T, legend = "bottom")
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
annotate_figure(birdlife_figure,
left = text_grob("Standardized bird retention response", rot = 90))
ggsave('city_effect_richness__output__ldg_birdlife_with_ssm.jpg', height = 1500, width = 2250, units = "px")
merlin_data_with_lat = fetch_city_data_for('merlin', include_city_name = T)
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
model2 <- glm(formula = response ~ I(city_centre_latitude^2), data = merlin_data_with_lat)
dat2 <- predict(model2, se.fit=T)
summary(model2)
Call:
glm(formula = response ~ I(city_centre_latitude^2), data = merlin_data_with_lat)
Deviance Residuals:
Min 1Q Median 3Q Max
-9.209 -2.825 -0.388 1.463 18.438
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.201e-02 5.651e-01 0.021 0.983
I(city_centre_latitude^2) -1.296e-05 4.655e-04 -0.028 0.978
(Dispersion parameter for gaussian family taken to be 18.29329)
Null deviance: 2469.6 on 136 degrees of freedom
Residual deviance: 2469.6 on 135 degrees of freedom
AIC: 790.97
Number of Fisher Scoring iterations: 2
with(summary(model2), 1 - deviance/null.deviance)
[1] 5.74457e-06
outside_se <- merlin_data_with_lat[merlin_data_with_lat$response < dat2$fit - 15* dat2$se.fit | merlin_data_with_lat$response > dat2$fit + 15 * dat2$se.fit,]
ggplot(merlin_data_with_lat, aes(x = city_centre_latitude, y = response)) +
geom_point(size=1, alpha = 0.2) +
geom_smooth(method = "glm", formula = y ~ I(x^2)) +
geom_text_repel(aes(label = name), data = outside_se, size = 3, nudge_y = 1, color = "red") +
geom_point(data = outside_se, color = "red") +
theme_bw() +
ylab("Standardized bird retention response") + xlab("Latitude (of city centre)")
ggsave('city_effect_richness__output__ldg_merlin.jpg')
Saving 7.29 x 4.51 in image